﻿import os
import sys
import Dumper

COL_GSL = 'GSL'
COL_WORD = 'word'
COL_DEF = 'def'
COL_IPA = 'IPA'
SPLIT_FACTOR = range(50, 56)

SET_GSL_CN={}
SET_GSL_CN['file'] = Dumper.encodeStr('General_Service_List_CN A_E.txt')
SET_GSL_CN['parser'] = Dumper.encodeStr('GSL_CN_Parser')
SET_GSL_CN['title'] = Dumper.encodeStr('General Service List 中文释义 A-E')
#SET_GSL_CN['split'] = {'factor':SPLIT_FACTOR}
SET_GSL_CN['template'] = {'tags' : 'English, EFL, ESL, GSL','defs_lang' : 'zh-CN'}
def convertIPA(ipa):
    #return ipa
    reaplceList = {}
    reaplceList['5'] = u'\u02C8' #' (primary) stress mark
    reaplceList['7'] = u'\u02CC' #, secondary stress
    reaplceList[':'] = u'\u02D0' #: length mark
    reaplceList['A'] = u'\u00E6'
    reaplceList['B'] = u'\u0251'
    reaplceList['C'] = u'\u0254' # same as R?
    reaplceList['E'] = u'\u0259'
    reaplceList['F'] = u'\u0283'
    reaplceList['I'] = u'\u026A'
    reaplceList['J'] = u'\u028A'
    reaplceList['N'] = u'\u014B'
    reaplceList['Q'] = u'\u028C'
    reaplceList['R'] = u'\u0254' # same as C?
    reaplceList['T'] = u'\u00F0'
    reaplceList['V'] = u'\u0292'
    reaplceList['W'] = u'\u03B8'
    reaplceList['Z'] = u'\u025B'
    for k in reaplceList:
        ipa = ipa.replace(k, reaplceList[k])
    return ipa

def GSL_CN_Parser(line):
    word = {}
    word[COL_WORD] = Dumper.encodeStr(line[:line.index('[')].strip())
    word[COL_IPA] = convertIPA(Dumper.encodeStr(line[line.index('['):line.index(']')+1].strip()))
    word[COL_DEF] = Dumper.encodeStr(line[line.index(']')+1:].strip())
    return word

def getFileLines(filename):
    f = open(filename)
    lines = f.readlines()
    f.close()
    return lines

def getSetByFile(setConfig):
    global testIPA
    lines = getFileLines(setConfig['file'])
    words = []
    ipas = {}
    for line in lines:
        f = getattr(__import__(__name__), setConfig['parser'])
        words.append(f(line))
    dumper = Dumper.FileDumper(setConfig['title'])
    Dumper.subConfigOutput(None, setConfig, words, None, COL_WORD, dumper)

if __name__ == '__main__':
    getSetByFile(SET_GSL_CN)
